Importing necessary Libraries
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn import preprocessing
from sklearn import tree
from sklearn.manifold import TSNE
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn import metrics
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns',None)
warnings.filterwarnings('ignore')
%matplotlib inline
Loading the train and test data residing in the project folder
data_train = pd.read_csv('/Users/harshitchandrol/Documents/SEM 2 /Advance Data Mining /Project /train.csv')
data_test = pd.read_csv('/Users/harshitchandrol/Documents/SEM 2 /Advance Data Mining /Project /test.csv')
print(data_train.shape)
(7352, 563)
print(data_test.shape)
(2947, 563)
data_train['subject'].nunique()
21
data_test['subject'].nunique()
9
the number of subject for which the observtions are captures in the train and test data are 21 and 9 respectively. Hence, we can say that the train and test data is split into 7:3 ratio.
#Duplicate Values
print('Duplicate values in train data: {}'.format(sum(data_train.duplicated())))
print('Duplicate values in test data: {}'.format(sum(data_test.duplicated())))
Duplicate values in train data: 0 Duplicate values in test data: 0
we have no duplicate observation in our train and test data. (The data is already engineered)
# Chancing for NaN and Null values
print('NaN/Null values in train data: {}'.format(data_train.isnull().sum()))
print('NaN/Null values in test data: {}'.format(data_test.isnull().sum()))
NaN/Null values in train data: tBodyAcc-mean()-X 0
tBodyAcc-mean()-Y 0
tBodyAcc-mean()-Z 0
tBodyAcc-std()-X 0
tBodyAcc-std()-Y 0
..
angle(X,gravityMean) 0
angle(Y,gravityMean) 0
angle(Z,gravityMean) 0
subject 0
Activity 0
Length: 563, dtype: int64
NaN/Null values in test data: tBodyAcc-mean()-X 0
tBodyAcc-mean()-Y 0
tBodyAcc-mean()-Z 0
tBodyAcc-std()-X 0
tBodyAcc-std()-Y 0
..
angle(X,gravityMean) 0
angle(Y,gravityMean) 0
angle(Z,gravityMean) 0
subject 0
Activity 0
Length: 563, dtype: int64
as the data is pre engineered, we dont have any missing values as well. if it had then we would have removed the missing values using '.dropna()' method.
sns.set_style('whitegrid')
plt.rcParams['font.family'] = 'Dejavu Sans'
plt.figure(figsize=(20,10))
plt.title('Data provided by each subject',fontsize=20)
sns.color_palette("pastel")
sns.countplot(x='subject',hue='Activity',data=data_train)
plt.show()
Same number of observation cn be seen by each candidate
plt.title('No of Datapoints per Activity', fontsize=15)
sns.countplot(data_train.Activity)
sns.color_palette("dark")
plt.xticks(rotation=90)
plt.show()
import plotly
import plotly.graph_objects as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import matplotlib as mlt
# Plotting data
label_counts = data_train['Activity'].value_counts()
# Get colors
n = label_counts.shape[0]
colormap = plt.get_cmap('viridis')
colors = [mlt.colors.to_hex(colormap(col)) for col in np.arange(0, 1.01, 1/(n-1))]
# Create plot
data = go.Bar(x = label_counts.index,
y = label_counts,
marker = dict(color = colors))
layout = go.Layout(title = 'Smartphone Activity Distribution',
xaxis = dict(title = 'Activity'),
yaxis = dict(title = 'Count'))
fig = go.Figure(data=[data], layout=layout)
fig.show()
#iplot(fig)
Static and Dynamic Activities
In static activities (sit, stand, lie down) motion information will not be very useful. In the dynamic activities (Walking, WalkingUpstairs,WalkingDownstairs) motion info will be significa
sns.set_palette("Set1", desat=0.80)
facetgrid = sns.FacetGrid(data_train, hue='Activity', height=5,aspect=2)
facetgrid.map(sns.distplot,'tBodyAccMag-mean()', hist=False)\
.add_legend()
plt.annotate("Stationary Activities", xy=(-0.960,12), xytext=(-0.5, 15), size=20,\
va='center', ha='left',\
arrowprops=dict(arrowstyle="simple",connectionstyle="arc3,rad=0.1"))
plt.annotate("Moving Activities", xy=(0,3), xytext=(0.2, 9), size=20,\
va='center', ha='left',\
arrowprops=dict(arrowstyle="simple",connectionstyle="arc3,rad=0.1"))
plt.show()
As aspected from most real world data, when participants are moving the data is normally distributed with some long tail.
# for plotting purposes taking datapoints of each activity to a different dataframe
df1 = data_train[data_train['Activity']=='STANDING']
df2 = data_train[data_train['Activity']=='SITTING']
df3 = data_train[data_train['Activity']=='LAYING']
df4 = data_train[data_train['Activity']=='WALKING']
df5 = data_train[data_train['Activity']=='WALKING_DOWNSTAIRS']
df6 = data_train[data_train['Activity']=='WALKING_UPSTAIRS']
plt.figure(figsize=(20,7))
plt.subplot(2,2,1)
plt.title('Stationary Activities(Zoomed in)')
sns.distplot(df4['tBodyAccMag-mean()'],color = 'r',hist = False, label = 'SITTING')
sns.distplot(df5['tBodyAccMag-mean()'],color = 'm',hist = False,label = 'STANDING')
sns.distplot(df6['tBodyAccMag-mean()'],color = 'c',hist = False, label = 'LAYING')
plt.axis([-1.08, -0.2, 0.1, 20])
plt.legend(loc='center')
plt.subplot(2,2,2)
plt.title('Moving Activities')
sns.distplot(df1['tBodyAccMag-mean()'],color = 'red',hist = False, label = 'WALKING')
sns.distplot(df2['tBodyAccMag-mean()'],color = 'blue',hist = False,label = 'WALING UP')
sns.distplot(df3['tBodyAccMag-mean()'],color = 'green',hist = False, label = 'WALKING DOWN')
plt.legend(loc='center right')
plt.tight_layout()
plt.show()
plt.figure(figsize=(7,5))
sns.boxplot(x='Activity', y='tBodyAccMag-mean()',data=data_train, showfliers=False, saturation=1)
plt.ylabel('Acceleration Magnitude mean')
plt.axhline(y=-0.7, xmin=0.1, xmax=0.9,dashes=(5,5), c='b')
plt.axhline(y=-0.05, xmin=0.4, dashes=(5,5), c='g')
plt.xticks(rotation=90)
plt.show()
If tAccMean is < -0.8 then the Activities are either Standing or Sitting or Laying.
If tAccMean is > -0.6 then the Activities are either Walking or WalkingDownstairs or WalkingUpstairs.
If tAccMean > 0.0 then the Activity is WalkingDownstairs.
We can classify 75% the Acitivity labels with some errors.
sns.boxplot(x='Activity', y='angle(X,gravityMean)', data=data_train)
plt.axhline(y=0.08, xmin=0.1, xmax=0.9,c='m',dashes=(5,3))
plt.title('Angle between X-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.show()
If angleX,gravityMean > 0 then Activity is Laying.
We can classify all datapoints belonging to Laying activity with just a single if else statement with Grvity mean in 2 or 3 (X,Y or Z) dimensions.
sns.boxplot(x='Activity', y='angle(Y,gravityMean)', data = data_train, showfliers=False)
plt.title('Angle between Y-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.axhline(y=-0.22, xmin=0.1, xmax=0.8, dashes=(5,3), c='m')
plt.show()
sns.boxplot(x='Activity', y='angle(Z,gravityMean)', data = data_train, showfliers=False)
plt.title('Angle between Z-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.axhline(y=-0.22, xmin=0.1, xmax=0.8, dashes=(5,3), c='m')
plt.show()
The dataset is geared towards classifying the activity of the participant. Let us investigate the separability of the classes.
# performs t-sne with different perplexity values and their repective plots..
def perform_tsne(X_data, y_data, perplexities, n_iter=1000, img_name_prefix='t-sne'):
for index,perplexity in enumerate(perplexities):
# perform t-sne
print('\nperforming tsne with perplexity {} and with {} iterations at max'.format(perplexity, n_iter))
X_reduced = TSNE(verbose=2, perplexity=perplexity).fit_transform(X_data)
print('Done..')
# prepare the data for seaborn
print('Creating plot for this t-sne visualization..')
df = pd.DataFrame({'x':X_reduced[:,0], 'y':X_reduced[:,1] ,'label':y_data})
# draw the plot in appropriate place in the grid
sns.lmplot(data=df, x='x', y='y', hue='label', fit_reg=False, height=8,\
palette="Set1",markers=['^','v','s','o', '1','2'])
plt.title("perplexity : {} and max_iter : {}".format(perplexity, n_iter))
img_name = img_name_prefix + '_perp_{}_iter_{}.png'.format(perplexity, n_iter)
print('saving this plot as image in present working directory...')
plt.savefig(img_name)
plt.show()
print('Done')
X_pre_tsne = data_train.drop(['subject', 'Activity'], axis=1)
y_pre_tsne = data_train['Activity']
perform_tsne(X_data = X_pre_tsne,y_data=y_pre_tsne, perplexities =[2,5,10,20,50])
performing tsne with perplexity 2 and with 1000 iterations at max [t-SNE] Computing 7 nearest neighbors... [t-SNE] Indexed 7352 samples in 0.001s... [t-SNE] Computed neighbors for 7352 samples in 0.966s... [t-SNE] Computed conditional probabilities for sample 1000 / 7352 [t-SNE] Computed conditional probabilities for sample 2000 / 7352 [t-SNE] Computed conditional probabilities for sample 3000 / 7352 [t-SNE] Computed conditional probabilities for sample 4000 / 7352 [t-SNE] Computed conditional probabilities for sample 5000 / 7352 [t-SNE] Computed conditional probabilities for sample 6000 / 7352 [t-SNE] Computed conditional probabilities for sample 7000 / 7352 [t-SNE] Computed conditional probabilities for sample 7352 / 7352 [t-SNE] Mean sigma: 0.597443 [t-SNE] Computed conditional probabilities in 0.016s [t-SNE] Iteration 50: error = 124.6806946, gradient norm = 0.0277756 (50 iterations in 0.874s) [t-SNE] Iteration 100: error = 106.6843567, gradient norm = 0.0275516 (50 iterations in 0.686s) [t-SNE] Iteration 150: error = 100.5849457, gradient norm = 0.0193792 (50 iterations in 0.577s) [t-SNE] Iteration 200: error = 97.2667542, gradient norm = 0.0154741 (50 iterations in 0.558s) [t-SNE] Iteration 250: error = 95.0155640, gradient norm = 0.0142860 (50 iterations in 0.564s) [t-SNE] KL divergence after 250 iterations with early exaggeration: 95.015564 [t-SNE] Iteration 300: error = 4.1172471, gradient norm = 0.0015668 (50 iterations in 0.558s) [t-SNE] Iteration 350: error = 3.2064433, gradient norm = 0.0010139 (50 iterations in 0.565s) [t-SNE] Iteration 400: error = 2.7765982, gradient norm = 0.0007163 (50 iterations in 0.583s) [t-SNE] Iteration 450: error = 2.5120106, gradient norm = 0.0005610 (50 iterations in 0.560s) [t-SNE] Iteration 500: error = 2.3279648, gradient norm = 0.0004774 (50 iterations in 0.581s) [t-SNE] Iteration 550: error = 2.1895463, gradient norm = 0.0004144 (50 iterations in 0.577s) [t-SNE] Iteration 600: error = 2.0799561, gradient norm = 0.0003655 (50 iterations in 0.611s) [t-SNE] Iteration 650: error = 1.9897580, gradient norm = 0.0003316 (50 iterations in 0.601s) [t-SNE] Iteration 700: error = 1.9139977, gradient norm = 0.0003000 (50 iterations in 0.627s) [t-SNE] Iteration 750: error = 1.8488334, gradient norm = 0.0002775 (50 iterations in 0.611s) [t-SNE] Iteration 800: error = 1.7920055, gradient norm = 0.0002585 (50 iterations in 0.617s) [t-SNE] Iteration 850: error = 1.7420161, gradient norm = 0.0002386 (50 iterations in 0.610s) [t-SNE] Iteration 900: error = 1.6971433, gradient norm = 0.0002239 (50 iterations in 0.603s) [t-SNE] Iteration 950: error = 1.6568744, gradient norm = 0.0002103 (50 iterations in 0.616s) [t-SNE] Iteration 1000: error = 1.6207337, gradient norm = 0.0001988 (50 iterations in 0.616s) [t-SNE] KL divergence after 1000 iterations: 1.620734 Done.. Creating plot for this t-sne visualization.. saving this plot as image in present working directory...
Done performing tsne with perplexity 5 and with 1000 iterations at max [t-SNE] Computing 16 nearest neighbors... [t-SNE] Indexed 7352 samples in 0.001s... [t-SNE] Computed neighbors for 7352 samples in 0.921s... [t-SNE] Computed conditional probabilities for sample 1000 / 7352 [t-SNE] Computed conditional probabilities for sample 2000 / 7352 [t-SNE] Computed conditional probabilities for sample 3000 / 7352 [t-SNE] Computed conditional probabilities for sample 4000 / 7352 [t-SNE] Computed conditional probabilities for sample 5000 / 7352 [t-SNE] Computed conditional probabilities for sample 6000 / 7352 [t-SNE] Computed conditional probabilities for sample 7000 / 7352 [t-SNE] Computed conditional probabilities for sample 7352 / 7352 [t-SNE] Mean sigma: 0.961446 [t-SNE] Computed conditional probabilities in 0.021s [t-SNE] Iteration 50: error = 113.9493713, gradient norm = 0.0213829 (50 iterations in 0.947s) [t-SNE] Iteration 100: error = 97.4634094, gradient norm = 0.0167576 (50 iterations in 0.687s) [t-SNE] Iteration 150: error = 93.0681076, gradient norm = 0.0095275 (50 iterations in 0.685s) [t-SNE] Iteration 200: error = 91.1163483, gradient norm = 0.0069787 (50 iterations in 0.583s) [t-SNE] Iteration 250: error = 89.9588394, gradient norm = 0.0051896 (50 iterations in 0.621s) [t-SNE] KL divergence after 250 iterations with early exaggeration: 89.958839 [t-SNE] Iteration 300: error = 3.5685205, gradient norm = 0.0014601 (50 iterations in 0.595s) [t-SNE] Iteration 350: error = 2.8102651, gradient norm = 0.0007495 (50 iterations in 0.579s) [t-SNE] Iteration 400: error = 2.4290500, gradient norm = 0.0005268 (50 iterations in 0.589s) [t-SNE] Iteration 450: error = 2.2122097, gradient norm = 0.0004087 (50 iterations in 0.593s) [t-SNE] Iteration 500: error = 2.0680881, gradient norm = 0.0003304 (50 iterations in 0.634s) [t-SNE] Iteration 550: error = 1.9632096, gradient norm = 0.0002831 (50 iterations in 0.658s) [t-SNE] Iteration 600: error = 1.8820696, gradient norm = 0.0002470 (50 iterations in 0.652s) [t-SNE] Iteration 650: error = 1.8171486, gradient norm = 0.0002196 (50 iterations in 0.646s) [t-SNE] Iteration 700: error = 1.7634370, gradient norm = 0.0001973 (50 iterations in 0.654s) [t-SNE] Iteration 750: error = 1.7180336, gradient norm = 0.0001823 (50 iterations in 0.672s) [t-SNE] Iteration 800: error = 1.6792035, gradient norm = 0.0001666 (50 iterations in 0.690s) [t-SNE] Iteration 850: error = 1.6456238, gradient norm = 0.0001521 (50 iterations in 0.713s) [t-SNE] Iteration 900: error = 1.6160247, gradient norm = 0.0001421 (50 iterations in 0.706s) [t-SNE] Iteration 950: error = 1.5897765, gradient norm = 0.0001339 (50 iterations in 0.682s) [t-SNE] Iteration 1000: error = 1.5662081, gradient norm = 0.0001271 (50 iterations in 0.690s) [t-SNE] KL divergence after 1000 iterations: 1.566208 Done.. Creating plot for this t-sne visualization.. saving this plot as image in present working directory...
Done performing tsne with perplexity 10 and with 1000 iterations at max [t-SNE] Computing 31 nearest neighbors... [t-SNE] Indexed 7352 samples in 0.001s... [t-SNE] Computed neighbors for 7352 samples in 0.943s... [t-SNE] Computed conditional probabilities for sample 1000 / 7352 [t-SNE] Computed conditional probabilities for sample 2000 / 7352 [t-SNE] Computed conditional probabilities for sample 3000 / 7352 [t-SNE] Computed conditional probabilities for sample 4000 / 7352 [t-SNE] Computed conditional probabilities for sample 5000 / 7352 [t-SNE] Computed conditional probabilities for sample 6000 / 7352 [t-SNE] Computed conditional probabilities for sample 7000 / 7352 [t-SNE] Computed conditional probabilities for sample 7352 / 7352 [t-SNE] Mean sigma: 1.133827 [t-SNE] Computed conditional probabilities in 0.038s [t-SNE] Iteration 50: error = 106.0404510, gradient norm = 0.0164523 (50 iterations in 0.918s) [t-SNE] Iteration 100: error = 90.9416046, gradient norm = 0.0103236 (50 iterations in 0.843s) [t-SNE] Iteration 150: error = 87.7861252, gradient norm = 0.0074197 (50 iterations in 0.758s) [t-SNE] Iteration 200: error = 86.4843750, gradient norm = 0.0050606 (50 iterations in 0.732s) [t-SNE] Iteration 250: error = 85.7295151, gradient norm = 0.0029502 (50 iterations in 0.738s) [t-SNE] KL divergence after 250 iterations with early exaggeration: 85.729515 [t-SNE] Iteration 300: error = 3.1488535, gradient norm = 0.0013967 (50 iterations in 0.696s) [t-SNE] Iteration 350: error = 2.5036268, gradient norm = 0.0006513 (50 iterations in 0.696s) [t-SNE] Iteration 400: error = 2.1824427, gradient norm = 0.0004233 (50 iterations in 0.692s) [t-SNE] Iteration 450: error = 1.9976087, gradient norm = 0.0003158 (50 iterations in 0.604s) [t-SNE] Iteration 500: error = 1.8784996, gradient norm = 0.0002529 (50 iterations in 0.622s) [t-SNE] Iteration 550: error = 1.7943295, gradient norm = 0.0002119 (50 iterations in 0.616s) [t-SNE] Iteration 600: error = 1.7313924, gradient norm = 0.0001855 (50 iterations in 0.625s) [t-SNE] Iteration 650: error = 1.6817471, gradient norm = 0.0001620 (50 iterations in 0.627s) [t-SNE] Iteration 700: error = 1.6421925, gradient norm = 0.0001455 (50 iterations in 0.624s) [t-SNE] Iteration 750: error = 1.6096678, gradient norm = 0.0001317 (50 iterations in 0.643s) [t-SNE] Iteration 800: error = 1.5822169, gradient norm = 0.0001194 (50 iterations in 0.633s) [t-SNE] Iteration 850: error = 1.5590074, gradient norm = 0.0001108 (50 iterations in 0.612s) [t-SNE] Iteration 900: error = 1.5387011, gradient norm = 0.0001041 (50 iterations in 0.630s) [t-SNE] Iteration 950: error = 1.5213611, gradient norm = 0.0000998 (50 iterations in 0.628s) [t-SNE] Iteration 1000: error = 1.5065804, gradient norm = 0.0000931 (50 iterations in 0.611s) [t-SNE] KL divergence after 1000 iterations: 1.506580 Done.. Creating plot for this t-sne visualization.. saving this plot as image in present working directory...
Done performing tsne with perplexity 20 and with 1000 iterations at max [t-SNE] Computing 61 nearest neighbors... [t-SNE] Indexed 7352 samples in 0.001s... [t-SNE] Computed neighbors for 7352 samples in 0.952s... [t-SNE] Computed conditional probabilities for sample 1000 / 7352 [t-SNE] Computed conditional probabilities for sample 2000 / 7352 [t-SNE] Computed conditional probabilities for sample 3000 / 7352 [t-SNE] Computed conditional probabilities for sample 4000 / 7352 [t-SNE] Computed conditional probabilities for sample 5000 / 7352 [t-SNE] Computed conditional probabilities for sample 6000 / 7352 [t-SNE] Computed conditional probabilities for sample 7000 / 7352 [t-SNE] Computed conditional probabilities for sample 7352 / 7352 [t-SNE] Mean sigma: 1.274336 [t-SNE] Computed conditional probabilities in 0.076s [t-SNE] Iteration 50: error = 97.8272934, gradient norm = 0.0162839 (50 iterations in 0.912s) [t-SNE] Iteration 100: error = 84.1415939, gradient norm = 0.0066233 (50 iterations in 0.728s) [t-SNE] Iteration 150: error = 82.0585632, gradient norm = 0.0036928 (50 iterations in 0.652s) [t-SNE] Iteration 200: error = 81.2350388, gradient norm = 0.0026874 (50 iterations in 0.726s) [t-SNE] Iteration 250: error = 80.8051758, gradient norm = 0.0017583 (50 iterations in 0.667s) [t-SNE] KL divergence after 250 iterations with early exaggeration: 80.805176 [t-SNE] Iteration 300: error = 2.7084403, gradient norm = 0.0013095 (50 iterations in 0.632s) [t-SNE] Iteration 350: error = 2.1716480, gradient norm = 0.0005794 (50 iterations in 0.642s) [t-SNE] Iteration 400: error = 1.9208679, gradient norm = 0.0003482 (50 iterations in 0.609s) [t-SNE] Iteration 450: error = 1.7743288, gradient norm = 0.0002481 (50 iterations in 0.638s) [t-SNE] Iteration 500: error = 1.6805768, gradient norm = 0.0001940 (50 iterations in 0.666s) [t-SNE] Iteration 550: error = 1.6164960, gradient norm = 0.0001568 (50 iterations in 0.647s) [t-SNE] Iteration 600: error = 1.5700113, gradient norm = 0.0001337 (50 iterations in 0.651s) [t-SNE] Iteration 650: error = 1.5345597, gradient norm = 0.0001171 (50 iterations in 0.655s) [t-SNE] Iteration 700: error = 1.5068108, gradient norm = 0.0001056 (50 iterations in 0.644s) [t-SNE] Iteration 750: error = 1.4846615, gradient norm = 0.0000961 (50 iterations in 0.627s) [t-SNE] Iteration 800: error = 1.4667996, gradient norm = 0.0000901 (50 iterations in 0.618s) [t-SNE] Iteration 850: error = 1.4523740, gradient norm = 0.0000836 (50 iterations in 0.622s) [t-SNE] Iteration 900: error = 1.4403598, gradient norm = 0.0000787 (50 iterations in 0.628s) [t-SNE] Iteration 950: error = 1.4302145, gradient norm = 0.0000762 (50 iterations in 0.630s) [t-SNE] Iteration 1000: error = 1.4214487, gradient norm = 0.0000730 (50 iterations in 0.631s) [t-SNE] KL divergence after 1000 iterations: 1.421449 Done.. Creating plot for this t-sne visualization.. saving this plot as image in present working directory...
Done performing tsne with perplexity 50 and with 1000 iterations at max [t-SNE] Computing 151 nearest neighbors... [t-SNE] Indexed 7352 samples in 0.001s... [t-SNE] Computed neighbors for 7352 samples in 1.051s... [t-SNE] Computed conditional probabilities for sample 1000 / 7352 [t-SNE] Computed conditional probabilities for sample 2000 / 7352 [t-SNE] Computed conditional probabilities for sample 3000 / 7352 [t-SNE] Computed conditional probabilities for sample 4000 / 7352 [t-SNE] Computed conditional probabilities for sample 5000 / 7352 [t-SNE] Computed conditional probabilities for sample 6000 / 7352 [t-SNE] Computed conditional probabilities for sample 7000 / 7352 [t-SNE] Computed conditional probabilities for sample 7352 / 7352 [t-SNE] Mean sigma: 1.437672 [t-SNE] Computed conditional probabilities in 0.185s [t-SNE] Iteration 50: error = 86.5481644, gradient norm = 0.0213117 (50 iterations in 0.950s) [t-SNE] Iteration 100: error = 75.5884323, gradient norm = 0.0042653 (50 iterations in 0.832s) [t-SNE] Iteration 150: error = 74.6465530, gradient norm = 0.0025135 (50 iterations in 0.729s) [t-SNE] Iteration 200: error = 74.2947617, gradient norm = 0.0014695 (50 iterations in 0.727s) [t-SNE] Iteration 250: error = 74.1211166, gradient norm = 0.0013156 (50 iterations in 0.774s) [t-SNE] KL divergence after 250 iterations with early exaggeration: 74.121117 [t-SNE] Iteration 300: error = 2.1523256, gradient norm = 0.0011806 (50 iterations in 0.737s) [t-SNE] Iteration 350: error = 1.7555063, gradient norm = 0.0004877 (50 iterations in 0.707s) [t-SNE] Iteration 400: error = 1.5864614, gradient norm = 0.0002822 (50 iterations in 0.695s) [t-SNE] Iteration 450: error = 1.4929533, gradient norm = 0.0001889 (50 iterations in 0.701s) [t-SNE] Iteration 500: error = 1.4331270, gradient norm = 0.0001403 (50 iterations in 0.716s) [t-SNE] Iteration 550: error = 1.3917806, gradient norm = 0.0001125 (50 iterations in 0.732s) [t-SNE] Iteration 600: error = 1.3625529, gradient norm = 0.0000948 (50 iterations in 0.721s) [t-SNE] Iteration 650: error = 1.3411245, gradient norm = 0.0000826 (50 iterations in 0.703s) [t-SNE] Iteration 700: error = 1.3254485, gradient norm = 0.0000744 (50 iterations in 0.698s) [t-SNE] Iteration 750: error = 1.3139782, gradient norm = 0.0000698 (50 iterations in 0.722s) [t-SNE] Iteration 800: error = 1.3053586, gradient norm = 0.0000627 (50 iterations in 0.718s) [t-SNE] Iteration 850: error = 1.2985430, gradient norm = 0.0000643 (50 iterations in 0.759s) [t-SNE] Iteration 900: error = 1.2934688, gradient norm = 0.0000586 (50 iterations in 0.721s) [t-SNE] Iteration 950: error = 1.2890513, gradient norm = 0.0000548 (50 iterations in 0.733s) [t-SNE] Iteration 1000: error = 1.2848508, gradient norm = 0.0000534 (50 iterations in 0.690s) [t-SNE] KL divergence after 1000 iterations: 1.284851 Done.. Creating plot for this t-sne visualization.. saving this plot as image in present working directory...
Done
We can clearly see from the TSNE cluster, All the Activities can be cleanly seperated except "Standing" and "Sitting".
data_whole = pd.concat([data_train,data_test]) # combining both train and test data
data_whole= data_train.sample(frac=1) #The sample() method returns a specified number of random rows.
f_data = data_whole.loc[:,data_whole.columns.str.contains('^f|^s|^a|^A')]
t_data = data_whole.loc[:,data_whole.columns.str.contains('^t|^s|^a|^A')]
X = data_whole.drop(['subject','Activity'],axis=1) # dropping subject and activity column
y = data_whole['Activity'] # getting only label as y
X_f = f_data.drop(['subject','Activity'],axis=1) # taking frequency components
y_f = f_data['Activity']
X_t = t_data.drop(['subject','Activity'],axis=1) # taking time components
y_t = t_data['Activity']
#intializing PCA
pca = PCA(n_components=0.99)
pca.fit(X)
X_reduced = pca.transform(X)
#printing the #numbers
print('Frequency components are {} Time components are {}'.format(X_f.shape[1],X.shape[1]-X_f.shape[1]))
print('Original components are {} Reduced components are {}'.format(X.shape[1],X_reduced.shape[1]))
#then splitting out data
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,
shuffle=True,random_state=444) # splitting original data
X_train_pca,X_test_pca,y_train_pca,y_test_pca = train_test_split(X_reduced,y,test_size=0.2,
shuffle=True,random_state=444) # splitting pca data
X_train_f,X_test_f,y_train_f,y_test_f = train_test_split(X_f,y_f,test_size=0.2,
shuffle=True,
random_state=444) # splitting frequency components data
X_train_t,X_test_t,y_train_t,y_test_t = train_test_split(X_t,y_t,test_size=0.2,
shuffle=True,
random_state=444) # splitting time components data
Frequency components are 296 Time components are 265 Original components are 561 Reduced components are 155
# checking the shape of original split data
print('X_train and y_train : ({},{})'.format(X_train.shape, y_train.shape))
print('X_test and y_test : ({},{})'.format(X_test.shape, y_test.shape))
X_train and y_train : ((5881, 561),(5881,)) X_test and y_test : ((1471, 561),(1471,))
Labels for confusion Matirx
labels=['LAYING', 'SITTING','STANDING','WALKING','WALKING_DOWNSTAIRS','WALKING_UPSTAIRS']
Function to plot the confusion matrix
plt.rcParams['font.family'] = 'sans-serif'
def plot_confusion_matrix(cm,classes,normalize=False,title='Confusion Matrix',cmap=plt.cm.RdYlGn):
if normalize:
clm=cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
plt.imshow(clm,interpolation='nearest',cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks,classes,rotation=90)
plt.yticks(tick_marks,classes)
fmt= '.2f' if normalize else 'd'
thresh = cm.max()/2.
for i in range (cm.shape[0]):
for j in range (cm.shape[1]):
plt.text(j,i,format(clm[i,j],fmt),horizontalalignment='center',color='white' if clm[i,j] > thresh
else 'black')
plt.tight_layout()
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
Function to run models
from datetime import datetime
def perform_model(model,X_train,y_train,X_test,y_test,class_labels,cm_normalize=True,
print_cm=True,cm_map=plt.cm.Greens):
# creating a dictionary to store various results
results = dict()
#time at which model stat runnig
train_start_time = datetime.now()
print('training the model ...')
model.fit(X_train,y_train)
print('Done..!\n')
train_end_time = datetime.now()
results['training_time'] = train_end_time - train_start_time
print('--> training time -{}\n'.format(results['training_time']))
#pridicting test data
print('Predicting test data')
test_start_time = datetime.now()
y_pred = model.predict(X_test)
test_end_time = datetime.now()
print('Done..!\n')
results['testing_time'] = test_end_time-test_start_time
print('--> testing time -{}'.format(results['testing_time']))
# calculating overall accuracy of the model
accuracy = metrics.accuracy_score(y_true=y_test,y_pred=y_pred)
#store accuracy in results
results['accuracy'] = accuracy
print('--> accuracy -{}\n'.format(accuracy))
#confusion matrix
cm = metrics.confusion_matrix(y_test,y_pred)
results['confusion_matrix'] = cm
if print_cm:
print('\n -------------Confusion Matirx---------------')
print('\n{}'.format(cm))
#plot confusion matrix
plt.figure(figsize=(8,8))
plt.grid(b=False)
plot_confusion_matrix(cm,classes=class_labels,normalize=True,title='Normalized Comfusion Matrix',cmap=cm_map)
plt.show()
# getting classification report
print('--------------------| Classification Report |-------------------')
classification_report = metrics.classification_report(y_test,y_pred)
# storing report in results
results['Classification_report'] = classification_report
print(classification_report)
# adding the trained model to the results
results['Model'] = model
return results
Function to print the grid search parameters
def print_grid_search_attributes(model):
#Estimator that have highest score from all the estimators
print('\n\n--> Best Estimator:')
print('\t{}\n'.format(model.best_estimator_))
#best result parameter
print('\n--> Best Parameter')
print('\tBest Estimator Parameter :{}'.format(model.best_params_))
# number of cross validation split
print('\n Number of CrossValidation sets:')
print('\t Total number of cross validation sets: {}'.format(model.n_splits_))
#Average cross vaidation score of the best estimator
print('\n--> Best Score:')
print('\tAverage Cross Validation score of best estimator: {}'.format(model.best_score_))
Logistic Regression with Grid Search
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=ConvergenceWarning)
from sklearn.model_selection import GridSearchCV
# starting the grid
param_grid = {'C':[0.01,0.1,1,10,20,30],'penalty':['l2','l1']}
log_reg = LogisticRegression()
log_reg_grid = GridSearchCV(log_reg,param_grid=param_grid,cv=3,verbose=1,n_jobs=-1)
log_reg_grid_results= perform_model(log_reg_grid,X_train,y_train,X_test,y_test,class_labels=labels)
# cheking the attributes of the modelfi
print_grid_search_attributes(log_reg_grid_results['Model'])
training the model ... Fitting 3 folds for each of 12 candidates, totalling 36 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
Done..! --> training time -0:00:05.934872 Predicting test data Done..! --> testing time -0:00:00.003633 --> accuracy -0.9789259007477906 -------------Confusion Matirx--------------- [[304 0 0 0 0 1] [ 1 252 14 0 0 1] [ 0 9 275 0 0 0] [ 0 0 0 217 0 1] [ 0 0 0 1 194 1] [ 0 0 0 1 1 198]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 1.00 1.00 1.00 305
SITTING 0.97 0.94 0.95 268
STANDING 0.95 0.97 0.96 284
WALKING 0.99 1.00 0.99 218
WALKING_DOWNSTAIRS 0.99 0.99 0.99 196
WALKING_UPSTAIRS 0.98 0.99 0.99 200
accuracy 0.98 1471
macro avg 0.98 0.98 0.98 1471
weighted avg 0.98 0.98 0.98 1471
--> Best Estimator:
LogisticRegression(C=10)
--> Best Parameter
Best Estimator Parameter :{'C': 10, 'penalty': 'l2'}
Number of CrossValidation sets:
Total number of cross validation sets: 3
--> Best Score:
Average Cross Validation score of best estimator: 0.9806156271789694
Linear SVC
from sklearn.svm import LinearSVC
parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}
lr_svc = LinearSVC(tol=0.00005)
lr_svc_grid = GridSearchCV(lr_svc, param_grid=parameters, n_jobs=-1, verbose=1)
lr_svc_grid_results = perform_model(lr_svc_grid, X_train, y_train, X_test, y_test, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(lr_svc_grid_results['Model'])
training the model ... Fitting 5 folds for each of 6 candidates, totalling 30 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
Done..! --> training time -0:00:12.230188 Predicting test data Done..! --> testing time -0:00:00.003524 --> accuracy -0.9857239972807614 -------------Confusion Matirx--------------- [[305 0 0 0 0 0] [ 0 253 15 0 0 0] [ 0 6 278 0 0 0] [ 0 0 0 218 0 0] [ 0 0 0 0 196 0] [ 0 0 0 0 0 200]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 1.00 1.00 1.00 305
SITTING 0.98 0.94 0.96 268
STANDING 0.95 0.98 0.96 284
WALKING 1.00 1.00 1.00 218
WALKING_DOWNSTAIRS 1.00 1.00 1.00 196
WALKING_UPSTAIRS 1.00 1.00 1.00 200
accuracy 0.99 1471
macro avg 0.99 0.99 0.99 1471
weighted avg 0.99 0.99 0.99 1471
--> Best Estimator:
LinearSVC(C=1, tol=5e-05)
--> Best Parameter
Best Estimator Parameter :{'C': 1}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.9855468185574994
Kernal SVM
from sklearn.svm import SVC
parameters = {'C':[2,8,16],\
'gamma': [ 0.0078125, 0.125, 2]}
rbf_svm = SVC(kernel='rbf')
rbf_svm_grid = GridSearchCV(rbf_svm,param_grid=parameters, n_jobs=-1)
rbf_svm_grid_results = perform_model(rbf_svm_grid, X_train, y_train, X_test, y_test, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(rbf_svm_grid_results['Model'])
training the model ... Done..! --> training time -0:01:18.655584 Predicting test data Done..! --> testing time -0:00:00.399466 --> accuracy -0.9898028552005439 -------------Confusion Matirx--------------- [[305 0 0 0 0 0] [ 0 260 7 0 0 1] [ 0 7 277 0 0 0] [ 0 0 0 218 0 0] [ 0 0 0 0 196 0] [ 0 0 0 0 0 200]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 1.00 1.00 1.00 305
SITTING 0.97 0.97 0.97 268
STANDING 0.98 0.98 0.98 284
WALKING 1.00 1.00 1.00 218
WALKING_DOWNSTAIRS 1.00 1.00 1.00 196
WALKING_UPSTAIRS 1.00 1.00 1.00 200
accuracy 0.99 1471
macro avg 0.99 0.99 0.99 1471
weighted avg 0.99 0.99 0.99 1471
--> Best Estimator:
SVC(C=16, gamma=0.0078125)
--> Best Parameter
Best Estimator Parameter :{'C': 16, 'gamma': 0.0078125}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.9870774308023975
Decision Tree with Grid Search
parameters = {'max_depth':np.arange(3,10,2)}
dt = DecisionTreeClassifier()
dt_grid = GridSearchCV(dt,param_grid=parameters, n_jobs=-1)
dt_grid_results = perform_model(dt_grid, X_train, y_train, X_test, y_test, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(dt_grid_results['Model'])
training the model ... Done..! --> training time -0:00:04.795516 Predicting test data Done..! --> testing time -0:00:00.003643 --> accuracy -0.9401767505098573 -------------Confusion Matirx--------------- [[305 0 0 0 0 0] [ 0 247 20 1 0 0] [ 0 21 263 0 0 0] [ 0 0 0 204 5 9] [ 0 0 0 6 182 8] [ 0 0 0 9 9 182]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 1.00 1.00 1.00 305
SITTING 0.92 0.92 0.92 268
STANDING 0.93 0.93 0.93 284
WALKING 0.93 0.94 0.93 218
WALKING_DOWNSTAIRS 0.93 0.93 0.93 196
WALKING_UPSTAIRS 0.91 0.91 0.91 200
accuracy 0.94 1471
macro avg 0.94 0.94 0.94 1471
weighted avg 0.94 0.94 0.94 1471
--> Best Estimator:
DecisionTreeClassifier(max_depth=9)
--> Best Parameter
Best Estimator Parameter :{'max_depth': 9}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.9420164837387801
Random Forest Classifier
params = {'n_estimators': np.arange(10,201,20), 'max_depth':np.arange(3,15,2)}
rfc = RandomForestClassifier()
rfc_grid = GridSearchCV(rfc, param_grid=params, n_jobs=-1)
rfc_grid_results = perform_model(rfc_grid, X_train, y_train, X_test, y_test, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(rfc_grid_results['Model'])
training the model ... Done..! --> training time -0:02:17.285373 Predicting test data Done..! --> testing time -0:00:00.029575 --> accuracy -0.9768864717878993 -------------Confusion Matirx--------------- [[304 0 0 0 0 1] [ 0 257 10 0 0 1] [ 0 12 272 0 0 0] [ 0 0 0 213 2 3] [ 0 0 0 0 192 4] [ 0 0 0 0 1 199]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 1.00 1.00 1.00 305
SITTING 0.96 0.96 0.96 268
STANDING 0.96 0.96 0.96 284
WALKING 1.00 0.98 0.99 218
WALKING_DOWNSTAIRS 0.98 0.98 0.98 196
WALKING_UPSTAIRS 0.96 0.99 0.98 200
accuracy 0.98 1471
macro avg 0.98 0.98 0.98 1471
weighted avg 0.98 0.98 0.98 1471
--> Best Estimator:
RandomForestClassifier(max_depth=13, n_estimators=150)
--> Best Parameter
Best Estimator Parameter :{'max_depth': 13, 'n_estimators': 150}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.9787448199330709
Gradient Boosted Decision Trees With GridSearch
from sklearn.ensemble import GradientBoostingClassifier
param_grid = {'max_depth': np.arange(5),'n_estimators':np.arange(140)}
gbdt = GradientBoostingClassifier()
gbdt_grid = GridSearchCV(gbdt, param_grid=param_grid, n_jobs=-1)
gbdt_grid_results = perform_model(gbdt_grid, X_train, y_train, X_test, y_test, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(gbdt_grid_results['Model'])
training the model ...
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Input In [31], in <cell line: 5>() 3 gbdt = GradientBoostingClassifier() 4 gbdt_grid = GridSearchCV(gbdt, param_grid=param_grid, n_jobs=-1) ----> 5 gbdt_grid_results = perform_model(gbdt_grid, X_train, y_train, X_test, y_test, class_labels=labels) 7 # observe the attributes of the model 8 print_grid_search_attributes(gbdt_grid_results['Model']) Input In [24], in perform_model(model, X_train, y_train, X_test, y_test, class_labels, cm_normalize, print_cm, cm_map) 8 train_start_time = datetime.now() 9 print('training the model ...') ---> 10 model.fit(X_train,y_train) 11 print('Done..!\n') 12 train_end_time = datetime.now() File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_search.py:891, in BaseSearchCV.fit(self, X, y, groups, **fit_params) 885 results = self._format_results( 886 all_candidate_params, n_splits, all_out, all_more_results 887 ) 889 return results --> 891 self._run_search(evaluate_candidates) 893 # multimetric is determined here because in the case of a callable 894 # self.scoring the return type is only known after calling 895 first_test_score = all_out[0]["test_scores"] File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_search.py:1392, in GridSearchCV._run_search(self, evaluate_candidates) 1390 def _run_search(self, evaluate_candidates): 1391 """Search all candidates in param_grid""" -> 1392 evaluate_candidates(ParameterGrid(self.param_grid)) File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_search.py:838, in BaseSearchCV.fit.<locals>.evaluate_candidates(candidate_params, cv, more_results) 830 if self.verbose > 0: 831 print( 832 "Fitting {0} folds for each of {1} candidates," 833 " totalling {2} fits".format( 834 n_splits, n_candidates, n_candidates * n_splits 835 ) 836 ) --> 838 out = parallel( 839 delayed(_fit_and_score)( 840 clone(base_estimator), 841 X, 842 y, 843 train=train, 844 test=test, 845 parameters=parameters, 846 split_progress=(split_idx, n_splits), 847 candidate_progress=(cand_idx, n_candidates), 848 **fit_and_score_kwargs, 849 ) 850 for (cand_idx, parameters), (split_idx, (train, test)) in product( 851 enumerate(candidate_params), enumerate(cv.split(X, y, groups)) 852 ) 853 ) 855 if len(out) < 1: 856 raise ValueError( 857 "No fits were performed. " 858 "Was the CV iterator empty? " 859 "Were there no candidates?" 860 ) File ~/opt/anaconda3/lib/python3.9/site-packages/joblib/parallel.py:1056, in Parallel.__call__(self, iterable) 1053 self._iterating = False 1055 with self._backend.retrieval_context(): -> 1056 self.retrieve() 1057 # Make sure that we get a last message telling us we are done 1058 elapsed_time = time.time() - self._start_time File ~/opt/anaconda3/lib/python3.9/site-packages/joblib/parallel.py:935, in Parallel.retrieve(self) 933 try: 934 if getattr(self._backend, 'supports_timeout', False): --> 935 self._output.extend(job.get(timeout=self.timeout)) 936 else: 937 self._output.extend(job.get()) File ~/opt/anaconda3/lib/python3.9/site-packages/joblib/_parallel_backends.py:542, in LokyBackend.wrap_future_result(future, timeout) 539 """Wrapper for Future.result to implement the same behaviour as 540 AsyncResults.get from multiprocessing.""" 541 try: --> 542 return future.result(timeout=timeout) 543 except CfTimeoutError as e: 544 raise TimeoutError from e File ~/opt/anaconda3/lib/python3.9/concurrent/futures/_base.py:441, in Future.result(self, timeout) 438 elif self._state == FINISHED: 439 return self.__get_result() --> 441 self._condition.wait(timeout) 443 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: 444 raise CancelledError() File ~/opt/anaconda3/lib/python3.9/threading.py:312, in Condition.wait(self, timeout) 310 try: # restore state no matter what (e.g., KeyboardInterrupt) 311 if timeout is None: --> 312 waiter.acquire() 313 gotit = True 314 else: KeyboardInterrupt:
from sklearn.neighbors import KNeighborsClassifier
param_grid = {'n_neighbors':[1,10,30,50,80],'weights':['uniform','distance'],'metric':['euclidean','manhattan']}
knc = KNeighborsClassifier()
knc_grid = GridSearchCV(knc, param_grid=param_grid, n_jobs=-1)
knc_grid_results = perform_model(knc_grid, X_train, y_train, X_test, y_test, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(knc_grid_results['Model'])
training the model ... Done..! --> training time -0:00:37.610952 Predicting test data Done..! --> testing time -0:00:03.292385 --> accuracy -0.9830047586675731 -------------Confusion Matirx--------------- [[305 0 0 0 0 0] [ 0 254 13 0 0 1] [ 0 11 273 0 0 0] [ 0 0 0 218 0 0] [ 0 0 0 0 196 0] [ 0 0 0 0 0 200]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 1.00 1.00 1.00 305
SITTING 0.96 0.95 0.95 268
STANDING 0.95 0.96 0.96 284
WALKING 1.00 1.00 1.00 218
WALKING_DOWNSTAIRS 1.00 1.00 1.00 196
WALKING_UPSTAIRS 1.00 1.00 1.00 200
accuracy 0.98 1471
macro avg 0.98 0.98 0.98 1471
weighted avg 0.98 0.98 0.98 1471
--> Best Estimator:
KNeighborsClassifier(metric='manhattan', n_neighbors=1)
--> Best Parameter
Best Estimator Parameter :{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.9811259168068247
Naive Bayes
from sklearn.naive_bayes import GaussianNB
param_grid = {'var_smoothing': np.logspace(0,-9, num=100)}
gnb = GaussianNB()
gnb_grid = GridSearchCV(gnb, param_grid=param_grid, n_jobs=-1)
gnb_grid_results = perform_model(gnb_grid, X_train, y_train, X_test, y_test, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(gnb_grid_results['Model'])
training the model ... Done..! --> training time -0:00:06.445757 Predicting test data Done..! --> testing time -0:00:00.012651 --> accuracy -0.8300475866757308 -------------Confusion Matirx--------------- [[301 0 0 0 0 4] [ 4 151 112 0 0 1] [ 0 2 281 0 0 1] [ 0 0 0 170 13 35] [ 0 0 0 18 138 40] [ 0 0 0 14 6 180]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 0.99 0.99 0.99 305
SITTING 0.99 0.56 0.72 268
STANDING 0.72 0.99 0.83 284
WALKING 0.84 0.78 0.81 218
WALKING_DOWNSTAIRS 0.88 0.70 0.78 196
WALKING_UPSTAIRS 0.69 0.90 0.78 200
accuracy 0.83 1471
macro avg 0.85 0.82 0.82 1471
weighted avg 0.86 0.83 0.83 1471
--> Best Estimator:
GaussianNB(var_smoothing=0.0657933224657568)
--> Best Parameter
Best Estimator Parameter :{'var_smoothing': 0.0657933224657568}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.8294467659621197
# ***************************** Print Accuracy and Error ******************************
print('\n Accuracy Error')
print(' ---------- --------')
print('Logistic Regression : {:.04}% {:.04}%'.format(log_reg_grid_results['accuracy'] * 100,\
100-(log_reg_grid_results['accuracy'] * 100)))
print('Linear SVC : {:.04}% {:.04}% '.format(lr_svc_grid_results['accuracy'] * 100,\
100-(lr_svc_grid_results['accuracy'] * 100)))
print('rbf SVM classifier : {:.04}% {:.04}% '.format(rbf_svm_grid_results['accuracy'] * 100,\
100-(rbf_svm_grid_results['accuracy'] * 100)))
print('DecisionTree : {:.04}% {:.04}% '.format(dt_grid_results['accuracy'] * 100,\
100-(dt_grid_results['accuracy'] * 100)))
print('Random Forest : {:.04}% {:.04}% '.format(rfc_grid_results['accuracy'] * 100,\
100-(rfc_grid_results['accuracy'] * 100)))
print('KNeighborsclssifier : {:.04}% {:.04}% '.format(knc_grid_results['accuracy'] * 100,\
100-(knc_grid_results['accuracy'] * 100)))
print('Naive Bayes : {:.04}% {:.04}% '.format(gnb_grid_results['accuracy'] * 100,\
100-(gnb_grid_results['accuracy'] * 100)))
Accuracy Error
---------- --------
Logistic Regression : 97.89% 2.107%
Linear SVC : 98.57% 1.428%
rbf SVM classifier : 98.98% 1.02%
DecisionTree : 94.02% 5.982%
Random Forest : 97.69% 2.311%
KNeighborsclssifier : 98.3% 1.7%
Naive Bayes : 83.0% 17.0%
kernal_evals = dict()
def evaluate_classification(model, name, X_train, X_test, y_train, y_test):
train_accuracy = metrics.accuracy_score(y_train, model.predict(X_train))
test_accuracy = metrics.accuracy_score(y_test, model.predict(X_test))
train_precision = metrics.precision_score(y_train, model.predict(X_train), average='micro')
test_precision = metrics.precision_score(y_test, model.predict(X_test), average='micro')
train_recall = metrics.recall_score(y_train, model.predict(X_train), average='micro')
test_recall = metrics.recall_score(y_test, model.predict(X_test), average='micro')
kernal_evals[str(name)] = [train_accuracy, test_accuracy, train_precision, test_precision, train_recall, test_recall]
print("Training Accuracy " + str(name) + " {} Test Accuracy ".format(train_accuracy*100) + str(name) + " {}".format(test_accuracy*100))
print("Training Precesion " + str(name) + " {} Test Precesion ".format(train_precision*100) + str(name) + " {}".format(test_precision*100))
print("Training Recall " + str(name) + " {} Test Recall ".format(train_recall*100) + str(name) + " {}".format(test_recall*100))
actual = y_test
predicted = model.predict(X_test)
confusion_matrix = metrics.confusion_matrix(actual, predicted)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ['LAYING', 'SITTING', 'STANDING', 'WALKING', 'DOWNSTAIRS',
'UPSTAIRS'])
fig, ax = plt.subplots(figsize=(10,10))
ax.grid(False)
cm_display.plot(ax=ax)
le = preprocessing.LabelEncoder().fit(y_train)
y_xgb_train = le.transform(y_train)
y_xgb_test = le.transform(y_test)
xgb_params = {'n_estimators': 100,
'learning_rate': 0.2,
'subsample': 0.927,
'colsample_bytree': 0.88,
'max_depth': 5,
'booster': 'gbtree',
'reg_lambda': 38,
'reg_alpha': 32,
'random_state': 12}
xgb = XGBClassifier(**xgb_params).fit(X_train, y_xgb_train)
evaluate_classification(xgb, "XGBClassifier", X_train, X_test, y_xgb_train, y_xgb_test)
Training Accuracy XGBClassifier 98.84373405883353 Test Accuracy XGBClassifier 96.87287559483345 Training Precesion XGBClassifier 98.84373405883353 Test Precesion XGBClassifier 96.87287559483345 Training Recall XGBClassifier 98.84373405883353 Test Recall XGBClassifier 96.87287559483345
from sklearn.svm import LinearSVC
parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}
lr_svc_pca = LinearSVC(tol=0.00005)
lr_svc_pca_grid = GridSearchCV(lr_svc_pca, param_grid=parameters, n_jobs=-1, verbose=1)
lr_svc_grid_pca_results = perform_model(lr_svc_pca_grid, X_train_pca, y_train_pca, X_test_pca,
y_test_pca, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(lr_svc_grid_pca_results['Model'])
training the model ... Fitting 5 folds for each of 6 candidates, totalling 30 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
Done..! --> training time -0:00:04.541936 Predicting test data Done..! --> testing time -0:00:00.001423 --> accuracy -0.9809653297076818 -------------Confusion Matirx--------------- [[305 0 0 0 0 0] [ 0 251 17 0 0 0] [ 0 9 275 0 0 0] [ 0 0 0 218 0 0] [ 0 0 0 1 195 0] [ 0 0 0 1 0 199]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 1.00 1.00 1.00 305
SITTING 0.97 0.94 0.95 268
STANDING 0.94 0.97 0.95 284
WALKING 0.99 1.00 1.00 218
WALKING_DOWNSTAIRS 1.00 0.99 1.00 196
WALKING_UPSTAIRS 1.00 0.99 1.00 200
accuracy 0.98 1471
macro avg 0.98 0.98 0.98 1471
weighted avg 0.98 0.98 0.98 1471
--> Best Estimator:
LinearSVC(C=2, tol=5e-05)
--> Best Parameter
Best Estimator Parameter :{'C': 2}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.9841858408614081
from sklearn.svm import LinearSVC
parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}
lr_svc_f = LinearSVC(tol=0.00005)
lr_svc_f_grid = GridSearchCV(lr_svc_f, param_grid=parameters, n_jobs=-1, verbose=1)
lr_svc_grid_f_results = perform_model(lr_svc_f_grid, X_train_f, y_train_f, X_test_f, y_test_f, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(lr_svc_grid_f_results['Model'])
training the model ... Fitting 5 folds for each of 6 candidates, totalling 30 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
Done..! --> training time -0:00:11.356503 Predicting test data Done..! --> testing time -0:00:00.002339 --> accuracy -0.9605710401087696 -------------Confusion Matirx--------------- [[305 0 0 0 0 0] [ 4 228 35 0 0 1] [ 0 10 274 0 0 0] [ 0 0 0 215 0 3] [ 0 0 0 1 195 0] [ 0 0 0 3 1 196]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 0.99 1.00 0.99 305
SITTING 0.96 0.85 0.90 268
STANDING 0.89 0.96 0.92 284
WALKING 0.98 0.99 0.98 218
WALKING_DOWNSTAIRS 0.99 0.99 0.99 196
WALKING_UPSTAIRS 0.98 0.98 0.98 200
accuracy 0.96 1471
macro avg 0.96 0.96 0.96 1471
weighted avg 0.96 0.96 0.96 1471
--> Best Estimator:
LinearSVC(C=8, tol=5e-05)
--> Best Parameter
Best Estimator Parameter :{'C': 8}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.969222455337275
from sklearn.svm import LinearSVC
parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}
lr_svc_t = LinearSVC(tol=0.00005)
lr_svc_t_grid = GridSearchCV(lr_svc_t, param_grid=parameters, n_jobs=-1, verbose=1)
lr_svc_grid_t_results = perform_model(lr_svc_t_grid, X_train_t, y_train_t, X_test_t, y_test_t, class_labels=labels)
# observe the attributes of the model
print_grid_search_attributes(lr_svc_grid_t_results['Model'])
training the model ... Fitting 5 folds for each of 6 candidates, totalling 30 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
Done..! --> training time -0:00:04.743023 Predicting test data Done..! --> testing time -0:00:00.003701 --> accuracy -0.9870836165873556 -------------Confusion Matirx--------------- [[305 0 0 0 0 0] [ 0 255 13 0 0 0] [ 0 6 278 0 0 0] [ 0 0 0 218 0 0] [ 0 0 0 0 196 0] [ 0 0 0 0 0 200]]
--------------------| Classification Report |-------------------
precision recall f1-score support
LAYING 1.00 1.00 1.00 305
SITTING 0.98 0.95 0.96 268
STANDING 0.96 0.98 0.97 284
WALKING 1.00 1.00 1.00 218
WALKING_DOWNSTAIRS 1.00 1.00 1.00 196
WALKING_UPSTAIRS 1.00 1.00 1.00 200
accuracy 0.99 1471
macro avg 0.99 0.99 0.99 1471
weighted avg 0.99 0.99 0.99 1471
--> Best Estimator:
LinearSVC(C=0.5, tol=5e-05)
--> Best Parameter
Best Estimator Parameter :{'C': 0.5}
Number of CrossValidation sets:
Total number of cross validation sets: 5
--> Best Score:
Average Cross Validation score of best estimator: 0.9865670822279634
print('Linear SVC PCA : {:.04}% {:.04}% '.format(lr_svc_grid_pca_results['accuracy'] * 100,\
100-(lr_svc_grid_pca_results['accuracy'] * 100)))
print('Linear SVC Freqeuncy : {:.04}% {:.04}% '.format(lr_svc_grid_f_results['accuracy'] * 100,\
100-(lr_svc_grid_f_results['accuracy'] * 100)))
print('Linear SVC Time : {:.04}% {:.04}% '.format(lr_svc_grid_t_results['accuracy'] * 100,\
100-(lr_svc_grid_t_results['accuracy'] * 100)))
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [12], in <cell line: 1>() ----> 1 print('Linear SVC PCA : {:.04}% {:.04}% '.format(lr_svc_grid_pca_results['accuracy'] * 100,\ 2 100-(lr_svc_grid_pca_results['accuracy'] * 100))) 4 print('Linear SVC Freqeuncy : {:.04}% {:.04}% '.format(lr_svc_grid_f_results['accuracy'] * 100,\ 5 100-(lr_svc_grid_f_results['accuracy'] * 100))) 7 print('Linear SVC Time : {:.04}% {:.04}% '.format(lr_svc_grid_t_results['accuracy'] * 100,\ 8 100-(lr_svc_grid_t_results['accuracy'] * 100))) NameError: name 'lr_svc_grid_pca_results' is not defined
barplot = pd.DataFrame({'Accuracy': [97.89,98.1,96.06,98.71],
'Error': [2.107,1.903,3.943,1.292]},
index=['Linear SVC','Linear SVC(PCA)','Linear SVC Freqyency','Linear SVC Time'])
barplot.plot(kind='bar', stacked=True, color=['skyblue', 'red'])
# Add Title and Labels
plt.title('Accuray & Error Scores %')
plt.ylabel('Accuracy')
Text(0, 0.5, 'Accuracy')